Exploring performance#
In this demo, we …
## Installing (if not) and importing compiam to the project
import importlib.util
if importlib.util.find_spec('compiam') is None:
%pip install compiam
if importlib.util.find_spec('essentia') is None:
%pip install essentia
if importlib.util.find_spec('torch') is None:
%pip install "torch==1.13"
if importlib.util.find_spec('tensorflow') is None:
%pip install "tensorflow==2.15.0" "keras<3"
import compiam
import essentia.standard as estd
# Import extras and supress warnings to keep the tutorial clean
import os
import gdown
import zipfile
import numpy as np
import IPython.display as ipd
from pprint import pprint
import warnings
warnings.filterwarnings('ignore')
AUDIO_PATH = os.path.join("..", "audio", "demos")
ARTIST = "dr-brindha-manickavasakan"
[ INFO ] MusicExtractorSVM: no classifier models were configured by default
We will work on a concert led by Dr. Brindha Manickavasakan, a well-known Carnatic music performer and Doctor which has been very much involved in our research efforts. This concert happened within the December Season 2023 in Chennai, India, at the well-known Arkay Convention Centre. Please note this recording in particular is part of the newly published Saraga Audiovisual Dataset (A. Shankar et al., 2024), which will be soon available for access through mirdata and compIAM.
For now, we will download this particular concert and explore a given rendition.
url = "https://drive.google.com/uc?id=1iR0bfxDLQbH8fEeHU_GFsg2kh7brZ0HZ&export=download"
output = os.path.join(AUDIO_PATH, "dr-brindha-manickavasakan.zip")
gdown.download(url, output, quiet=False)
Once the audio is download, we can extract all the files and remove the .zip file.
# Unzip file
with zipfile.ZipFile(output, 'r') as zip_ref:
zip_ref.extractall(AUDIO_PATH)
# Delete zip file after extraction
os.remove(output)
Loading and visualising the data#
rendition = "Bhavanuta" # Selecting a rendition
We use Essentia to first load the mixture audio of the concert. The function AudioLoader can be used to load an audio signal from a file path and return the signal plus some important technical information about it.
file_path = os.path.join(AUDIO_PATH, ARTIST, rendition, rendition + ".wav")
audio_mix, _, _, _, _, _ = estd.AudioLoader(filename=file_path)()
audio_mix = audio_mix.T # Put channels first
Let’s quickly listed to 30 seconds of this incredible performance!
ipd.Audio(audio_mix[..., :44100*30], rate=44100)
Low level feature extraction#
Tonic Identification#
Music Source Separation#
Small intro goes here?
from compiam import load_model
# This model uses tensorflow in the backend!
separation_model = load_model("separation:cold-diff-sep")
separated_vocals = separation_model.separate(audio_mix)
separated_vocals.shape
(11343744,)
ipd.Audio(separated_vocals[..., :44100*30], rate=44100)
For further reference, please visit the music source separation page.
Pitch Extraction#
from compiam import load_model
# This model uses tensorflow in the backend!
# Importing and initializing again a melodia instance
### Salamon et al. 2012
from compiam.melody.pitch_extraction import Melodia
melodia = Melodia()
# Importing also a DL model to extract the melody
### Plaja-Roglans et al. 2023
ftanet_carnatic = load_model("melody:ftanet-carnatic")
Predict the melody using both methods.
melodia_pitch_track = melodia.extract(audio_mix)
ftanet_pitch_track = ftanet_carnatic.predict(
audio_mix,
out_step=melodia_pitch_track[1, 0], # Interpolating to same size
)
[2024-11-28 12:48:26,594] WARNING [compiam.melody.pitch_extraction.melodia.extract:90] Resampling... (input sampling rate is 44100Hz, make sure this is correct)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In [12], line 1
----> 1 melodia_pitch_track = melodia.extract(audio_mix)
2 ftanet_pitch_track = ftanet_carnatic.predict(
3 audio_mix,
4 out_step=melodia_pitch_track[1, 0], # Interpolating to same size
5 )
File /opt/hostedtoolcache/Python/3.11.10/x64/lib/python3.11/site-packages/compiam/melody/pitch_extraction/melodia.py:93, in Melodia.extract(self, input_data, input_sr, out_step)
89 elif isinstance(input_data, np.ndarray):
90 logger.warning(
91 f"Resampling... (input sampling rate is {input_sr}Hz, make sure this is correct)"
92 )
---> 93 resample_audio = estd.Resample(
94 inputSampleRate=input_sr, outputSampleRate=self.sample_rate
95 )()
96 input_data = resample_audio(input_data)
97 audio = estd.EqualLoudness(signal=input_data)()
File /opt/hostedtoolcache/Python/3.11.10/x64/lib/python3.11/site-packages/essentia/standard.py:123, in _create_essentia_class.<locals>.Algo.__call__(self, *args)
122 def __call__(self, *args):
--> 123 return self.compute(*args)
File /opt/hostedtoolcache/Python/3.11.10/x64/lib/python3.11/site-packages/essentia/standard.py:70, in _create_essentia_class.<locals>.Algo.compute(self, *args)
67 inputNames = self.inputNames()
69 if len(args) != len(inputNames):
---> 70 raise ValueError(name+'.compute requires '+str(len(inputNames))+' argument(s), '+str(len(args))+' given')
72 # we have to make some exceptions for YamlOutput and PoolAggregator
73 # because they expect cpp Pools
74 if name in ('YamlOutput', 'PoolAggregator', 'SvmClassifier', 'PCA', 'GaiaTransform', 'TensorflowPredict'):
ValueError: Resample.compute requires 1 argument(s), 0 given
Let’s visualize from sec. 4 to sec. 10 of the performance, together with the predicted pitch tracks using both methods.
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
y, sr = librosa.load(example.audio_path)
fig, ax = plt.subplots(nrows=1, ncols=1, sharex=True, figsize=(15, 12))
D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
img = librosa.display.specshow(D, y_axis='linear', x_axis='time', sr=sr, ax=ax);
ax.set_ylim(0, 2000)
ax.set_xlim(4, 10)
plt.plot(
melodia_pitch_track[:, 0], melodia_pitch_track[:, 1],
color="white", label="Melodia",
)
plt.plot(
ftanet_pitch_track[:, 0], ftanet_pitch_track[:, 1],
color="black",label="FTANet-Carnatic",
)
plt.legend()
plt.show()
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In [13], line 6
3 import numpy as np
4 import matplotlib.pyplot as plt
----> 6 y, sr = librosa.load(example.audio_path)
7 fig, ax = plt.subplots(nrows=1, ncols=1, sharex=True, figsize=(15, 12))
8 D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
NameError: name 'example' is not defined
For further reference, please visit the pitch extraction page.
Percussion onset detection#
High level feature extraction#
Melodic pattern discovery#
Raga recognition#
from compiam import load_model
# This model uses tensorflow in the backend!
deepsrgm = load_model("melody:deepsrgm")
feat = deepsrgm.get_features(audio_mix)
[2024-11-28 12:48:28,017] WARNING [compiam.melody.raga_recognition.deepsrgm.get_features:242] Resampling... (input sampling rate is {input_sr}Hz, make sure this is correct)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
File /opt/hostedtoolcache/Python/3.11.10/x64/lib/python3.11/site-packages/essentia/standard.py:97, in _create_essentia_class.<locals>.Algo.compute(self, *args)
96 try:
---> 97 convertedData = _c.convertData(arg, goalType)
98 except TypeError:
File /opt/hostedtoolcache/Python/3.11.10/x64/lib/python3.11/site-packages/essentia/common.py:349, in convertData(data, goalType)
347 return [[col for col in row] for row in data]
--> 349 raise TypeError('Cannot convert data from type %s (%s) to type %s' %
350 (str(origType), str(type(data)), str(goalType)))
TypeError: Cannot convert data from type MATRIX_REAL (<class 'numpy.ndarray'>) to type VECTOR_REAL
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
Cell In [15], line 1
----> 1 feat = deepsrgm.get_features(audio_mix)
File /opt/hostedtoolcache/Python/3.11.10/x64/lib/python3.11/site-packages/compiam/melody/raga_recognition/deepsrgm/__init__.py:248, in DEEPSRGM.get_features(self, input_data, input_sr, pitch_path, tonic_path, from_mirdata, track_id, k)
242 logger.warning(
243 "Resampling... (input sampling rate is {input_sr}Hz, make sure this is correct)"
244 )
245 resampling = estd.Resample(
246 inputSampleRate=input_sr, outputSampleRate=self.sample_rate
247 )
--> 248 audio = resampling(input_data)
249 else:
250 raise ValueError("Input must be path to audio signal or an audio array")
File /opt/hostedtoolcache/Python/3.11.10/x64/lib/python3.11/site-packages/essentia/standard.py:123, in _create_essentia_class.<locals>.Algo.__call__(self, *args)
122 def __call__(self, *args):
--> 123 return self.compute(*args)
File /opt/hostedtoolcache/Python/3.11.10/x64/lib/python3.11/site-packages/essentia/standard.py:99, in _create_essentia_class.<locals>.Algo.compute(self, *args)
97 convertedData = _c.convertData(arg, goalType)
98 except TypeError:
---> 99 raise TypeError('Error cannot convert argument %s to %s' \
100 %(str(_c.determineEdt(arg)), str(goalType)))
102 convertedArgs.append(convertedData)
104 results = self.__compute__(*convertedArgs)
TypeError: Error cannot convert argument MATRIX_REAL to VECTOR_REAL
predicted_raga = deepsrgm.predict(feat)
predicted_raga
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In [16], line 1
----> 1 predicted_raga = deepsrgm.predict(feat)
2 predicted_raga
NameError: name 'feat' is not defined
deepsrgm.raga_mapping[predicted_raga]
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Cell In [17], line 1
----> 1 deepsrgm.raga_mapping[predicted_raga]
AttributeError: 'DEEPSRGM' object has no attribute 'raga_mapping'